**** Title: EW - SUBSET (2005) and (2012)
**** Goal:
***** 1. creating subsets that will be used in merging
***** 2. Merge NR file and ID wives of migrants
***** 3. Merge EW1 and EW2
***** 4. Create a variable ID those w/ abs husbands in Wave1 and Wave2


*********************************
*	Setting Paths		        *
*********************************

* Set working directory to "JOP Replication files" folder on your computer 

************** PART I *************
**** Load 2005-06 EW data (this is in the HH data) ***

clear
use "DATA FILES TO SHARE/IHDS_RAW/22626-0002-Data.dta", clear

*** create a subset of relevant variables ***

keep CASEID STATEID DISTID PSUID HHID HHSPLITID IDHH IDPSU STATEID2 DISTNAME DIST01 URBAN METRO6 SWEIGHT COPC GROUPS8 HHASSETS HHED2 HHED5ADULT HHED5F HHED5M EW3 EW4 EW5 EW6 EW7 EW8 EW9 GR1A GR1B GR1C GR1D GR1E GR1F GR1G  GR2A GR2B GR2C GR2D GR2E GR2F GR2G  GR3A GR3B GR3C GR3D GR3E GR3F GR3G  GR4A GR4B GR4C GR4D GR4E GR4F GR4G  GR5A GR5B GR5C GR5D GR5E GR5F GR5G  GR6A GR6B GR6C GR6D GR6E GR6F  GR7A GR7B GR7C GR7D GR7E GR7F  GR8A GR8B GR8C GR8D GR8E GR8F GR11A GR11B GR11C GR11D GR13 GR14 GR15A GR15B  GR16 GR18C LB4 LB20 LB35 GR19 GR9 GR21 GR22 GR23 GR24 GR25 GR26 GR31 GR27 GR28 GR29 GR30 MH7 INCOME

rename (GR5A GR5B GR5C GR5D GR5E GR5F GR5G) (GRmarr_A GRmarr_B  GRmarr_C  GRmarr_D  GRmarr_E  GRmarr_F  GRmarr_G) 
rename (GR3A GR3B GR3C GR3D GR3E GR3F GR3G ) (GRchild_A GRchild_B  GRchild_C  GRchild_D  GRchild_E  GRchild_F  GRchild_G) 
rename (GR2A GR2B GR2C GR2D GR2E GR2F GR2G) (GRlux_A GRlux_B GRlux_C GRlux_D GRlux_E GRlux_F GRlux_G) 
rename (GR1A GR1B GR1C GR1D GR1E GR1F GR1G)(GRcook_A GRcook_B  GRcook_C  GRcook_D  GRcook_E  GRcook_F  GRcook_G) 
rename (GR6A GR6B GR6C GR6D GR6E GR6F) (GRhc_A GRhc_B  GRhc_C  GRhc_D  GRhc_E  GRhc_F) 

rename (GR7A GR7B GR7C GR7D GR7E GR7F) (GRrel_A GRrel_B  GRrel_C  GRrel_D  GRrel_E  GRrel_F ) 
rename (GR8A GR8B GR8C GR8D GR8E GR8F) (GRshop_A GRshop_B  GRshop_C  GRshop_D  GRshop_E  GRshop_F) 
rename (GR11A GR11B GR11C GR11D) (GRmandi_A GRmandi_B  GRmandi_C  GRmandi_D) 
rename (GR13 GR14 GR15A GR15B GR16)(GR_hh_meal GR_hh_cash GR_hh_ac GR_hh_acname GR_hh_prop)
rename (GR18C LB4 LB20 LB35)(pol_disc pol_hhworker pol_immu pol_icds)

**update Sep 15, 2020
** Controls for norms 
rename ( GR9 GR19 GR21 GR22 GR23 GR24 GR25 GR26 GR31 GR27 GR28 GR29 GR30 MH7) (GRghunghat GRnatalfam  GRwifebeat_A  GRwifebeat_B GRwifebeat_C GRwifebeat_D GRwifebeat_E GRwidows GRharass  GRgirlsold_A  GRgirlsold_B GRgirlsold_C GRgirlsold_D caste_marr) 

save "DATA FILES TO SHARE/TEMP_FILES/ew_subset_05.dta", replace

**** Load 2011-12 EW data (this is in the HH data) ***
clear
use "DATA FILES TO SHARE/IHDS_RAW/36151-0003-Data.dta", clear
keep SURVEY STATEID DISTID PSUID HHID HHSPLITID PERSONID IDPSU IDHH IDPERSON GROUPS EW5 EW6 EW7Y EW5 EW6 EW7Y EW8 EW9 EW10 EW11  GR1A GR1B GR1C GR1D GR1E GR1F GR1G   GR2A GR2B GR2C GR2D GR2E GR2F GR2G GR3A GR3B GR3C GR3D GR3E GR3F GR3G GR4A GR4B GR4C GR4D GR4E GR4F GR4G GR8A GR8B GR8C GR8D GR8E GR8F GR8G GR9A GR9B GR9C GR9D GR9E GR9F  GR10A GR10B GR10C GR10D GR10E GR10F GR11A GR11B GR11C GR11D GR11E GR11F GR23A GR23B GR23C GR23D GR25 GR26 GR27A GR27B GR28 GR46 GR46A GR46B GR47 GR48 GR49 GR18A GR18B GR18C GR18D GR19 GR29C LB8 LB33 LB48 GR20 GR30  GR31 GR32 GR34 GR35 GR36 GR37 GR38 GR39 GR40 GR45 GR41 GR42 GR43 GR44 MH7 INCOME 

 rename (GR11A GR11B GR11C GR11D GR11E GR11F) (GRshop_A GRshop_B  GRshop_C  GRshop_D  GRshop_E  GRshop_F) 
rename (GR9A GR9B GR9C GR9D GR9E GR9F) (GRhc_A GRhc_B  GRhc_C  GRhc_D  GRhc_E  GRhc_F ) 
rename (GR10A GR10B GR10C GR10D GR10E GR10F) (GRrel_A GRrel_B  GRrel_C  GRrel_D  GRrel_E  GRrel_F ) 
rename (GR8A GR8B GR8C GR8D GR8E GR8F GR8G) (GRmarr_A GRmarr_B  GRmarr_C  GRmarr_D  GRmarr_E  GRmarr_F  GRmarr_G) 
rename (GR3A GR3B GR3C GR3D GR3E GR3F GR3G ) (GRchild_A GRchild_B  GRchild_C  GRchild_D  GRchild_E  GRchild_F  GRchild_G) 
rename (GR2A GR2B GR2C GR2D GR2E GR2F GR2G) (GRlux_A GRlux_B GRlux_C GRlux_D GRlux_E GRlux_F GRlux_G) 
rename (GR1A GR1B GR1C GR1D GR1E GR1F GR1G)(GRcook_A GRcook_B  GRcook_C  GRcook_D  GRcook_E  GRcook_F  GRcook_G) 
 rename (GR23A GR23B GR23C GR23D) (GRmandi_A GRmandi_B  GRmandi_C  GRmandi_D) 
rename (GR25 GR26 GR27A GR27B GR28)(GR_hh_meal GR_hh_cash GR_hh_ac GR_hh_acname GR_hh_prop)
rename (GR46 GR46A GR46B GR47 GR48 GR49)(emp_wage emp_nrega emp_nregacurr emp_say emp_unem emp_unemp2)
rename ( GR18A GR18B GR18C GR18D GR29C GR19  LB8 LB33 LB48)(pol_mem_mm pol_mem_shg pol_mem_cred pol_mem_porg pol_disc pol_meet pol_hhworker pol_immu pol_icds)
rename (GROUPS) (GROUPS8)
**update Sep 15, 2020
** Controls for norms 
rename (GR20 GR30  GR31 GR32 GR34 GR35 GR36 GR37 GR38 GR39 GR40 GR45 GR41 GR42 GR43 GR44 MH7) (GRghunghat GRnatalfam GRnatalfam_they GRnatalfam_phone  GRwifebeat_A GRwifebeat_E GRwifebeat_B GRwifebeat_C GRwifebeat_F GRwifebeat_D GRwidows GRharass  GRgirlsold_A  GRgirlsold_B GRgirlsold_C GRgirlsold_D caste_marr) 


save "DATA FILES TO SHARE/TEMP_FILES/ew_subset_12.dta", replace


************ PART II ************************
***********************************************
****** MERGE NON_RES WITH EW TO ID WIVES OF ABS HUSBANDS ********
clear
*** load the Non-Resident file 2011-12
use "DATA FILES TO SHARE/IHDS_RAW/36151-0007-Data.dta"

*** Now only keep those that say they are "spouses"
keep if NR4 == 2

** NR2 is the ID of the individual's contact in the HH data i.e. the EW it refers to in NR4 == 2
rename NR2 PERSONID 


***merge it with the EW2 subset file 
merge 1:1 STATEID DISTID PSUID HHID HHSPLITID PERSONID using "DATA FILES TO SHARE/TEMP_FILES/ew_subset_12.dta"

**so 2227 NR have accompanying matching IDs in the EW file i.e. they have wives who are surveyed in the 2nd wave

drop if _merge == 1
rename _merge merge_nr_ew_w2
save "DATA FILES TO SHARE/TEMP_FILES/ew2_nr.dta", replace


clear
**load EW in round one
use "DATA FILES TO SHARE/TEMP_FILES/ew_subset_05.dta", clear
gen PERSONID=EW3
recast int PERSONID, force
drop if PERSONID<0

*** this drops 8,057 obs bc they don;t have a EW3. ie. EW ID so they are as good as missing OBS and can be deleted
save "DATA FILES TO SHARE/TEMP_FILES/ew_subset_05.dta", replace

clear
*** load the NR file 2005-06

use "DATA FILES TO SHARE/IHDS_RAW/22626-0004-Data.dta", clear


*** the PERSONID in this currently is of the NR themselves - like NR0 in the 2011-12 wave 
** let is rename it to avoid confusion 
rename  PERSONID PERSONID_NR

keep if NR4 == 2

 ** NR2 is the ID of the individual's contact in the HH data i.e. the EW it refers to in NR4 == 2 
 ** that is what we are calling PERSONID in EW so to make it standard we rename it 
rename NR2 PERSONID 

** there are two duplicate values that need to be excluded bc of which the 1:1 merge is not ocurring
*** first create a unique ID
egen id = group(STATEID DISTID PSUID HHID HHSPLITID PERSONID)
*** now drop the dupicate rows (2 of them) and now DF is N = 1357
duplicates drop id, force

drop id

merge 1:1 STATEID DISTID PSUID HHID HHSPLITID PERSONID using "DATA FILES TO SHARE/TEMP_FILES/ew_subset_05.dta"

/*

    Result                      Number of obs
    -----------------------------------------
    Not matched                        33,340
        from master                       600  (_merge==1)
        from using                     32,740  (_merge==2)

    Matched                               757  (_merge==3)
    -----------------------------------------


*/
drop if _merge == 1
rename _merge merge_nr_ew_w1
save "DATA FILES TO SHARE/TEMP_FILES/ew1_nr.dta", replace

**************** PART III ******************************
************** Merging EW1 and EW2 based on the link files *******************


*** Load link file
use "DATA FILES TO SHARE/IHDS_RAW/linkew.dta",clear

sort STATEID DISTID PSUID HHID HHSPLITID PERSONID

** merge this with the EW 2 file from IHDS 2
merge 1:1 STATEID DISTID PSUID HHID HHSPLITID PERSONID using "DATA FILES TO SHARE/TEMP_FILES/ew2_nr.dta"

  /*Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                            39,523  (_merge==3)
    -----------------------------------------

*/ 


sort STATEID DISTID PSUID HHID2005 HHSPLITID2005 PERSONID 
rename _merge _mergeR2link
** save this as EW 2 
save "DATA FILES TO SHARE/TEMP_FILES/ew2_link.dta", replace

clear

**load round 1 HH data cuz EW in round one is in HH data
use "DATA FILES TO SHARE/TEMP_FILES/ew1_nr.dta", clear
gen PERSONID2005=EW3
recast int PERSONID2005, force
drop if PERSONID2005<0

rename HHID HHID2005
rename HHSPLITID HHSPLITID2005

** rename all vars so that there is no over writing 

rename * x*

** rename teh merge variables back to original o/w merge will not occur 
rename xSTATEID STATEID
rename xDISTID DISTID
rename xPSUID PSUID
rename xHHID2005 HHID2005
rename xHHSPLITID2005 HHSPLITID2005
rename xPERSONID2005 PERSONID2005

merge 1:m STATEID DISTID PSUID HHID2005 HHSPLITID2005 PERSONID2005 using "DATA FILES TO SHARE/TEMP_FILES/ew2_link.dta"

/*

   Result                      Number of obs
    -----------------------------------------
    Not matched                        22,062
        from master                     8,018  (_merge==1)
        from using                     14,044  (_merge==2)

    Matched                            25,479  (_merge==3)
    -----------------------------------------

*/

sort STATEID DISTID PSUID HHID HHSPLITID 
rename _merge _mergeR1R2link

*** only keep the EW that match in both waves
keep if _mergeR1R2link == 3

** create a new column 
generate w1_abshusband_dummy = 0 
 replace w1_abshusband_dummy = 1 if xmerge_nr_ew_w1 == 3
 
 generate w2_abshusband_dummy = 0 
 replace w2_abshusband_dummy = 1 if merge_nr_ew_w2 == 3
 
save "DATA FILES TO SHARE/TEMP_FILES/ew_merge.dta", replace
